/*
Input: BTW2009-Strvz-alph-Stadtb.xlsx
	> electoral roll 2009: raw addresses of all eligible voters and their assigned precincts

Output: tmp/strassen_btw09_voll_buchstabe.dta

Main task:
	> import, clean, and identify street segments that are assigned to a given precinct 
			-> a segment is equal to a streetname + range of numbers (e.g., Mystreet 6-18)
		>> note: a maximum number is not always given (e.g., singletons and "from 6 onwards" cases) 
			-> we generate max numbers by hand (e.g., 6 -> 6-6 and "from 6" -> 6-320 (hypothetical max))
		
	> from street segments extend data to include each single address (e.g., Mystreet 6, Mystreet 8, etc.)
		-> several cases to be considered:
		0. streets without any numbers // not the case in 2017
		 . address named exactly (border cases of the segments: if 6-10a -> 6 and 10a)
		1. beginning and ending of address contain only numbers and no letters (e.g., 1-15)
		 If segments with letters, different cases:
			2. only beginning of the segment contains a letter (e.g., 16a-24)
				3. only ending of the segment contains a letter (e.g., 16-24f)
			If segments contain letter at the beginning and the ending, different cases:
			4. assignment given for exactly one address (same street and same letter), e.g. 16a-16a
			5. assignment given for exactly one street (but with different addresses (letters)), e.g. 17b-17e
			6. first and second mentioned address contain letter and both have different numbers (e.g., 8b-11d)

*/



************************************************************************
**** 1. import, clean, and prepare for merging // very similar to 2013
************************************************************************

** PULL: Raw 2009 electoral roll; import column by columns
	tempfile col_1 col_2 col_3 col_4 

	import excel using "$rawdata/election_office/electoral_rolls/BTW2009-Strvz-alph-Stadtb.xlsx", clear cellrange(A1) 
	keep A C
	rename A street_tmp
	rename C sb
	save `col_1'
	 
	import excel using "$rawdata/election_office/electoral_rolls/BTW2009-Strvz-alph-Stadtb.xlsx", clear cellrange(D1) 
	keep D F
	rename D street_tmp
	rename F sb
	save `col_2'
	

	import excel using "$rawdata/election_office/electoral_rolls/BTW2009-Strvz-alph-Stadtb.xlsx", clear cellrange(G1) 
	keep G I
	rename G street_tmp
	rename I sb
	save `col_3'

	import excel using "$rawdata/election_office/electoral_rolls/BTW2009-Strvz-alph-Stadtb.xlsx", clear cellrange(J1) 
	keep J L
	rename J street_tmp
	rename L sb
	save `col_4'
clear 
	
	forvalues j = 1/4 {
		append using `col_`j''
	}
	missings dropobs, force
	
	replace sb = subinstr(sb, " ", "", .)
	lab var sb "raw precinct id"
	
	

** Identify and clean addresses 
	/* Excerpt of the column 'strasse' => goal is to extract Street names and segments
	
	S t a d t b e z i r k

	Adelgundenstr.   		<- street name 
	1  -   3   unger. 		<- street segment 
	2  -   4   gerade
	ab       5   fortl.
	Albertgasse				<- street name 
	Alexandrastr.

	*/

* Drop empty rows and 'Stadtbezirk' label 
	replace 	street_tmp = subinstr(street_tmp, " ", "", .)
	drop if street_tmp == ""
	drop if street_tmp == "Stadtbezirk" // district
*	drop tmp*

* Identify whether an entry references street segment; save in separate variables
	gen tmp = ustrregexm(street_tmp,"(ab)?[0-9]")
	
	// gen strasse1 := raw street name 
	gen 	street = street_tmp if tmp==0
	
	// fill up empty rows with street names
	replace street = street[_n-1] if missing(street)
	
	// gen hnr_orig_segment := street segment
	gen 	hnr_orig_segment = street_tmp if tmp==1
	drop tmp* street_tmp
	
* Drop redundant entries
*	(when multiple segments are listed; street name spans mult. rows & precinct id is empty => drop these rows)
	drop if missing(sb)

* generate district ID
	gen 	stadtbez = substr(sb, 1, length(sb)-2)
	lab var stadtbez "city district ID"

* clean some (long) street names (checked manually)
	replace street="Hildegard-von-Bingen-Anger" 	if street=="Hild.-v.-Bingen-Ang."
	replace street="Rondell Neuwittelsbach" 		if street=="Rond.Neuwittelsbach"
	replace street="Franz-Josef-Strauß-Ring" 		if street=="F.-J.-Strauß-Ring"
	replace street="Hedwig-Dransfeld-Allee" 		if street=="Hedwig-Dransfeld-A."
	replace street="Karl-Amadeus-Hartmann-Weg" 		if street=="Karl-A.-Hartmann-Weg"
	replace street="Mies-van-der-Rohe-Str." 		if street=="Mies-van-der-Rohe-St"
	replace street="Geschwister-Scholl-Pl." 		if street=="Geschw.-Scholl-Pl."
	replace street="Walter-Klingenbeck-Weg" 		if street=="Walter-Klingenbeck-W"
	replace street="Heinrich-Stieglitz-Kehre" 		if street=="Heinr.-Stiegl.-Kehre"
	replace street="Jean-Paul-Richter-Str." 		if street=="Jean-Paul-Richter-St"
	replace street="Hermann-von-Sicherer-Str." 		if street=="Herm.v.SichererStr."
	replace street="Christoph-Rapparini-Bogen" 		if street=="Christ.-Rapparini-Bg"
	replace street="Werner-Friedmann-Bogen" 		if street=="Werner-Friedmann-Bg"
	replace street="Friedenheimer Brücke" 			if street=="FriedenheimerBrü."
	replace street="Johann-Sebastian-Bach-Str." 	if street=="Johann-Seb-Bach-Str."
	replace street="Maximilian-Wetzger-Str." 		if street=="Maximilian-Wetzger-S"
	replace street="Nördliche Auffahrtsallee" 		if street=="Ndl.-Auffahrtsallee"
	replace street="Nördliches Schloßrondell" 		if street=="Ndl.-Schloßrondell"
	replace street="Rainer-Werner-Fassbinder-Pl." 	if street=="R.-W.-Fassbinder-Pl."
	replace street="Südliche Auffahrtsallee" 		if street=="Südl.Auffahrtsallee"
	replace street="Südliches Schloßrondell" 		if street=="Suedl.Schloßrondell"
	replace street="Am Hartmannshofer Bächl" 		if street=="AmHartmannsh.Bächl"
	replace street="Ehrenbreitsteiner Str." 		if street=="EhrenbreitsteinerSt"
	replace street="Jakob-Hagenbucher-Str." 		if street=="Jak.-Hagenbucher-Str"
	replace street="Moosacher St.-Martins-Pl." 		if street=="MoosachSt-Martins-P"
	replace street="Christoph-von-Gluck-Pl." 		if street=="Christ.-von-Gluck-Pl"
	replace street="Erwin-von-Steinbach-Weg" 		if street=="Erw.v.-Steinbach-Weg"
	replace street="Martin-Luther-King-Weg" 		if street=="Mart.Luther-King-Weg"
	replace street="Werner-Seelenbinder-Weg" 		if street=="W.-Seelenbinder-Weg"
	replace street="Alois-Wolfmüller-Str." 			if street=="Alois-Wolfmueller-St"
	replace street="Christoph-Probst-Str." 			if street=="Christ.-Probst-Str."
	replace street="Elisabeth-Selbert-Str." 		if street=="Elisabeth-Selbert-St"
	replace street="Freimanner Bahnhofstr." 		if street=="FreimannerBahnhofst"
	replace street="Heinrich-Geißler-Str." 			if street=="Heinr.-Geißler-Str."
	replace street="Karl-Köglsperger-Str." 			if street=="Karl-Köglsperger-St"
	replace street="Peter-Paul-Althaus-Str." 		if street=="Peter-Paul-Althaus-S"
	replace street="Werner-Heisenberg-Allee" 		if street=="W.-Heisenberg-Allee"
	replace street="Adolf-Oberländer-Weg" 			if street=="Adolf-Oberländer-We"
	replace street="Anschütz-Kaempfe-Str." 			if street=="Anschuetz-Kaempfe-S"
	replace street="Bernhard-Lichtenberg-Weg" 		if street=="Bernh.-Lichtenberg-W"
	replace street="Else-Lasker-Schüler-Str." 		if street=="E.-Lasker-Schüler-S"
	replace street="Eugen-Kalkschmidt-Weg" 			if street=="Eug.-Kalkschmidt-Weg"
	replace street="Friedrich-Herschel-Str." 		if street=="Friedr.-Herschel-Str"
	replace street="Gertrud-von-Le-Fort-Weg" 		if street=="Gertr.-v-Le-Fort-Weg"
	replace street="Hugo-von-Hofmannsthal-Str." 	if street=="H.-v.-Hofmannsthal-S"
	replace street="Johannes-Neuhäusler-Str." 		if street=="Joh.-Neuhäusler-Str"
	replace street="Marie-Luise-Kaschnitz-Str." 	if street=="Marie-L.-Kaschnitz-S"
	replace street="Schnorr-von-Carolsfeld-Str." 	if street=="Schnorr-v-Carolsf-St"
	replace street="Wolfgang-Borchert-Weg" 			if street=="Wolfg.-Borchert-Weg"
	replace street="Zur Deutschen Einheit" 			if street=="ZurDeutsch.Einheit"
	replace street="Johann-Michael-Fischer-Pl." 	if street=="Joh.-Mich.-Fischer-P"
	replace street="Anton-Geisenhofer-Str." 		if street=="Anton-Geisenhofer-St"
	replace street="Bürgermeister-Keller-Str." 		if street=="Bürgerm.-Keller-Str"
	replace street="Elisabeth-Mann-Borgese-Str." 	if street=="Elis.-Mann-Borgese-S"
	replace street="Elisabeth-zu-Guttenberg-Str." 	if street=="Elis.-z-Guttenberg-S"
	replace street="Friedrich-Creuzer-Str." 		if street=="Friedr.-Creuzer-Str"
	replace street="Georg-Kerschensteiner-Str." 	if street=="G.-kerschensteiner-s"
	replace street="Herzogstandstr. (Gronsdorf)" 	if street=="Herzogstandstr.-Grdf"
	replace street="Maria-Montessori-Str." 			if street=="M.-Montessori-Str."
	replace street="Albert-Schweitzer-Str." 		if street=="Albert-Schweitzer-St"
	replace street="Arnold-Sommerfeld-Str." 		if street=="Arnold-Sommerfeld-St"
	replace street="Charles-de-Gaulle-Str." 		if street=="Charles-de-Gaulle-St"
	replace street="Dr.-Walther-von-Miller-Str." 	if street=="Dr-Walth.-v-Miller-S"
	replace street="Friedrich-Engels-Bogen" 		if street=="Friedr.-Engels-Bogen"
	replace street="Gerhart-Hauptmann-Ring" 		if street=="Gerh.-Hauptmann-Ring"
	replace street="Gertrud-Kückelmann-Weg" 		if street=="Gertr.-Kückelmann-W"
	replace street="Hans-Pössenbacher-Weg" 			if street=="Hans-Pössenbacher-W"
	replace street="Joseph-Maria-Lutz-Anger" 		if street=="Jos-Maria-Lutz-Anger"
	replace street="Marieluise-Fleißer-Bogen" 		if street=="Mariel.-Fleißer-Bog"
	replace street="Maximilian-Kolbe-Allee" 		if street=="Maximil.-Kolbe-Allee"
	replace street="Peter-Lühr-Str." 				if street=="Peter-Luehr-Str."
	replace street="Stephanskirchener Str." 		if street=="StephanskirchenerSt"
	replace street="Heinrich-Kröller-Str." 			if street=="Heinr.-Kröller-Str."
	replace street="Alpspitzstr. (Pullach)" 		if street=="Alpspitzstr.-Pullach"
	replace street="Höllriegelskreuther Str." 		if street=="Hoellriegelskr.Str."
	replace street="Vinzenz-Schüpfer-Str." 			if street=="Vinzenz-Schüpfer-St"
	replace street="Holzapfelkreuther Str." 		if street=="HolzapfelkreutherSt"
	replace street="Ludwig-Wörl-Weg" 				if street=="Ludwig-Woerl-Weg"
	replace street="Annelies-Kupper-Allee" 			if street=="Annelies-Kupper-Alle"
	replace street="Philippine-Schick-Allee" 		if street=="Philipp.-Schick-Alle"
	replace street="Richard-Riemerschmid-Allee" 	if street=="Rich-Riemerschmid-Al"
	replace street="Am Aubinger Wasserturm" 		if street=="AmAubingerWassert."
	replace street="Centa-Hafenbrädl-Str." 			if street=="Centa-Hafenbrädl-St"
	replace street="Friedrichshafener Str." 		if street=="FriedrichshafenerSt"
	replace street="Josef-Steinbacher-Weg" 			if street=="Jos.-Steinbacher-Weg"
	replace street="Margarete-Kliemann-Weg" 		if street=="Marg.-Kliemann-Weg"
	replace street="Walter-Schnackenberg-Weg" 		if street=="Walt-Schnackenberg-W"
	replace street="Albert-Pfretzschner-Weg" 		if street=="Albert-Pfretzschn.-W"
	replace street="Ernst-von-Romberg-Str." 		if street=="Ernst-von-Romberg-St"
	replace street="Georg-Reismüller-Str." 			if street=="Gg.-Reismüller-Str."
	replace street="Nikolaus-Rüdinger-Str." 		if street=="Nikol.-Rüdinger-Str"
	replace street="Peter-Stegmüller-Weg" 			if street=="Peter-Stegmüller-Wg"
	replace street="Auf den Schrederwiesen" 		if street=="Aufd.Schrederwiesen"
	replace street="Berthold-Litzmann-Str." 		if street=="Berthold-Litzmann-St"
	replace street="Ignatius-Blenninger-Str." 		if street=="Ignat.-Blenninger-St"
	replace street="Ludwig-Gramminger-Str." 		if street=="Ludw.-Gramminger-Str"
	replace street="Paula-Breitenbach-Weg" 			if street=="Paula-Breitenbach-Wg"
	replace street="Steinröschenstr." 				if street=="Steinroeschenstr."
	replace street="Wilhelmine-Reichard-Str." 		if street=="Wilhelm.-Reichard-S."
	replace street="Johannes-Scharrer-Str." 		if street=="Johannes-Scharrer-St"
	replace street="Friedrich-Brugger-Weg" 			if street=="Friedr.-Brugger-Weg"
	replace street="Herzog-Albrecht-Anlage" 		if street=="Herzog-Albrecht-Anl."
	replace street="Johann-Karg-Straße (Haar)" 		if street=="Joh.-Karg-Str.(Haar)"
	replace street="Melitta-Bentz-Straße" 			if street=="Melitta-Benz-Str."
	replace street="Roßsteinstraße" 				if street=="Rosssteinstr."
	replace street="Wolfratshauser Straße (Pul)" 	if street=="Wolfratsh.Str.(Pul)"
	replace street="Jürgen-von-Hollander-Platz" 	if street=="J.-v.-Hollander-Pl."
	replace street="Karl-Heinrich-Ulrichs-Platz" 	if street=="Karl-H.-Ulrichs-Pl."
	replace street="Ludwig-Hilberseimer-Straße" 	if street=="Ludw--Hilberseimer-S"
	replace street="Magdalena-Bräu-Weg" 			if street=="Magdalena-Braeu-Weg"
	replace street="Margarete-Schütte-Lihotzky-Straße" if street=="Mar.-Schuette-Lih.-S"
	replace street="Marianne-von-Werefkin-Weg" 		if street=="Mar.-v.-Werefkin-W."
	replace street="Maria-Goeppert-Mayer-Straße" 	if street=="M.-Goeppert-Mayer-St"
	replace street="Paul-Heyse-Unterführung" 		if street=="Paul-Heyse-Unterfü."
	replace street="Pfarrer-Rosenberger-Straße" 	if street=="Pfarr.-Rosenberger-S"
	replace street="Platz der Opfer des Nationalsozialismus" if street=="Pl-d-Opferd-Nat.Soz"
	replace street="Resi-Huber-Platz" 				if street=="RESI-HUBER-PL."
	replace street="Rudi-Hierl-Platz" 				if street=="RUDI-HIERL-PL."
	replace street="Schneiderhofstraße (Gronsdorf)" if street=="Schneiderhofstr.(GR)"
	replace street="Schwester-Adelmunda-Weg" 		if street=="Schwest.-Adelmunda-W"
	replace street="Siegfried-Mollier-Straße" 		if street=="Siegfr.-Mollier-Str."
	replace street="Thomas-Theodor-Heine-Weg" 		if street=="Thomas-Th.-Heine-Weg"
	replace street="Wilhelm-Dörpfeld-Weg" 			if street=="Wilh.-Dörpfeld-Weg"
	replace street="Wilhelm-Hausenstein-W." 		if street=="Wilh.-Hausenstein-W."
	replace street="Wolfgang-Früchtl-Straße" 		if street=="Wolfg.-Früchtl-Str."
	replace street="Gottfried-Koelwel-Weg" 			if street=="Gottfr.-Koelwel-Weg"
	replace street="Alois-Johann-Lippl-Weg" 		if street=="Alois-Joh.-Lippl-Weg"
	replace street="Sämannstraße (Gräfelfing)" 		if street=="Saemannstr.(Graef.)"
	replace street="Landaubogen" 					if street=="Leonhard-Moll-Bogen" // renamed by the city for historical reseason between 2014 and 2017
	replace street="Ilse-Weber-Str." 				if street=="Paul-Lagarde-Str." // renamed by the city for historical reseason between 2014 and 2017
	replace street="Brunnthaler Weg" 				if street=="Friedrich-Berber-Weg" // renamed by the city between 2014 and 2017

*Adjust street names for geocoding
	replace street = subinstr(street, "Kard.", "Kardinal", .)     
	replace street = subinstr(street, "d.", "der", .)
	replace street = subinstr(street, "Alb.", "Albert", .)     
	replace street = subinstr(street, "b.", "burger", .)     
	replace street = subinstr(street, "-Bog", "Bogen", .)     
	replace street = subinstr(street, "B.", "Bogen", .)     
	replace street = subinstr(street, "Br.", "Brücke", .)     
	replace street = subinstr(street, "Bog.", "Bogen", .)     
	replace street = subinstr(street, "Bogenen", "Bogen", .)     
	replace street = subinstr(street, "Wink.", "Winkel", .)     
	replace street = subinstr(street, "A.", "An", .)     
	replace street = subinstr(street, "-S.", "-Str.", .)     
	replace street = subinstr(street, "-Rg.", "Ring", .)     
	replace street = subinstr(street, "-Rg", "Ring", .)     
	replace street = subinstr(street, "v.", "von", .)     
	replace street = subinstr(street, "-v-", "von", .)     
	replace street = subinstr(street, "Wg", "Weg", .)   if  substr(street, strlen(street)-1, 2) =="Wg"
	replace street = subinstr(street, "W.", "Weg", .)   if  substr(street, strlen(street)-1, 2) =="W."
	replace street = subinstr(street, "W", "Weg", .)    if  substr(street, strlen(street), 1) =="W"
	replace street = subinstr(street, "wg", "weg", .)   if  substr(street, strlen(street)-1, 2) =="wg"
	replace street = subinstr(street, "P", "Platz", .)  if  substr(street, strlen(street), 1) =="P"
	replace street = subinstr(street, "St", "Straße", .) if substr(street, strlen(street)-1, 2) =="St"

	replace street="An der Streicherbreiten" if street=="A.d.Streicherbreiten"
	replace street="An der Hauptfeuerwache"  if street=="And.Hauptfeuerwache"


*Further change street names for geocoding
	replace street = subinstr(street,"St.","Sankt",.)
	replace street = subinstr(street,"Südl.","Südliche",.)
	replace street = subinstr(street,"Alle","Allee",.)
	replace street = subinstr(street,"Alleee","Allee",.)
	replace street = subinstr(street,"pl.","platz",.)
	replace street = subinstr(street,"Pl.","Platz",.)
	replace street = subinstr(street,"str.","straße",.)
	replace street = subinstr(street,"Str.","Straße",.)
	replace street = subinstr(street,"straße","strasse",.)
	replace street = subinstr(street,"Straße","strasse",.)

	replace street = street + "asse" if lower(substr(street, strlen(street)-2, 3)) =="str"
	replace street = street + "atz" if lower(substr(street, strlen(street)-1, 2)) =="pl"

	replace street="Allensteiner strasse" if street=="Alleensteinerstrasse"
	replace street="Allescherstrasse" if street=="Alleescherstrasse"

	
* gen: merge_strasse := harmonized street name
	rename 	street merge_strasse
	lab var merge_strasse "harmonized street id for merging"
	
	replace merge_strasse = subinstr(merge_strasse,"Dr.","Doktor",.)
	replace merge_strasse = subinstr(merge_strasse,"Prof.","Professor",.)
	replace merge_strasse = subinstr(merge_strasse," ","",.)
	replace merge_strasse = lower(merge_strasse)
	replace merge_strasse = subinstr(merge_strasse,"-","",.)
	replace merge_strasse = subinstr(merge_strasse,".","",.)
	replace merge_strasse = subinstr(merge_strasse,"'","",.)
	replace merge_strasse = subinstr(merge_strasse,"ß","ss",.)
	replace merge_strasse = subinstr(merge_strasse,"ä","ae",.)
	replace merge_strasse = subinstr(merge_strasse,"ö","oe",.)
	replace merge_strasse = subinstr(merge_strasse,"ü","ue",.)
	
* different from later years: segments not only defined by "-", but also by "from" ("ab"), "even" ("gerade"), "odd" ("unger."), "consecutive" ("fortl.")
	gen even = strpos(hnr_orig_segment, "gerade")
	gen odd  = strpos(hnr_orig_segment, "unger.") // no cases of "unger"
	gen ab 	 = strpos(hnr_orig_segment, "ab")
	gen fort = strpos(hnr_orig_segment, "fortl.") // no cases of "fort"
	gen minus = strpos(hnr_orig_segment, "-")
	
	assert ab==1 | ab==0
	assert even==0 & odd==0 if fort>0
	assert even==0 & fort==0 if odd>0
	assert odd==0 & fort==0 if even>0
	assert fort==even==odd==0 if ab==0& minus==0
	
** Extract address numbers from entries, e.g., 	'2-4gerade', 'ab26fortl.' etc.
*	gen hnr_min / hnr_max := start number in given street 
*	gen buchstaben_min /buchstaben_max := (optional) letter attachted to street number, e.g., 44A 
*	when whole street is assigned to a precinct, hnr_orig_segment is empty

* get starting number
	* 'from' case
	gen 	hnr_orig_segment1 = substr(hnr_orig_segment, ab+2, .) if ab==1
	
	* "-" (minus) case"
	replace hnr_orig_segment1 = substr(hnr_orig_segment, 1, minus-1) if ab==0
	
	*"raw" case: only one number given
	* note: this is different than in later year; it is possible that a singleton address is stated (6 instead of 6-6)
	replace hnr_orig_segment1 = hnr_orig_segment if ab==0 & minus==0
	
	// assert that all cases are considered
	assert missing(hnr_orig_segment)==missing(hnr_orig_segment1)

* get ending number
	gen hnr_orig_segment2 = substr(hnr_orig_segment, minus+1, .) if ab==0
	
	//assert that all cases are considered
	assert missing(hnr_orig_segment)==missing(hnr_orig_segment2) | ab==1
	drop if hnr_orig_segment2 == "0" // drop if 0 is the number (does not exist)

	// assert: no common typos
	assert substr(hnr_orig_segment2,1,1)!="0"
	assert strpos(hnr_orig_segment1, ",")==0

* Impute "maximum" address number when not given
	*singleton addresses: start=end (e.g., start and end = 22)
	assert hnr_orig_segment==hnr_orig_segment1 if ab==0 & minus==0
	replace hnr_orig_segment2 = hnr_orig_segment if ab==0 & minus==0
	
	* when not specified, we assume the highest possible address number (320/321)
	assert  missing(hnr_orig_segment2) if missing(hnr_orig_segment)
	replace hnr_orig_segment2 = "320" if missing(hnr_orig_segment2) & even>0
	replace hnr_orig_segment2 = "321" if missing(hnr_orig_segment2) & odd>0
	replace hnr_orig_segment2 = "321" if missing(hnr_orig_segment2) & fort>0

* extract address numbers from range specifications; (note this leave cases such as 44A)
	// drop known strings
	foreach c in "gerade" "unger." "fortl." {
		replace hnr_orig_segment1 = subinstr(hnr_orig_segment1,"`c'","",.)
		replace hnr_orig_segment2 = subinstr(hnr_orig_segment2,"`c'","",.)
	}
	
	// extract numeric part (, e.g. drop A in 44A)
	gen hnr_min = real(ustrregexs(1)) if ustrregexm(hnr_orig_segment1,"([0-9]+)")
	gen hnr_max = real(ustrregexs(1)) if ustrregexm(hnr_orig_segment2,"([0-9]+)")
	tostring hnr_min hnr_max, replace

	// extract letter attached to numbers (e.g., get A from 44A)
	gen 	buchstaben_min = subinstr(hnr_orig_segment1, hnr_min, "", .)
	gen 	buchstaben_max = subinstr(hnr_orig_segment2, hnr_max, "", .)
	replace buchstaben_min=lower(buchstaben_min)
	replace buchstaben_max=lower(buchstaben_max)
	destring hnr_min hnr_max, replace

	// drop help variables
	drop hnr_orig_segment1 hnr_orig_segment2
	
************************************************************************
 // 2. start generating addresses //
************************************************************************
//go through different cases
frame copy default tmp, replace
frame tmp {
*case0: streets without numbers -> whole street belongs to precinct
	keep if missing(hnr_orig_segment)
	
* keep variables
	keep stadtbez sb merge_strasse
	
* create duplicates for each street to add numbers
	expand 321 // 321 is chosen as no street contains a higher number (checked)

* generate address number
	bys stadtbez merge_strasse: assert _N==321
	bys stadtbez merge_strasse: gen hnr = _n
	
* create duplicates to add letters (a-z) to each number
	expand 27
	bys stadtbez merge_strasse hnr: gen buchstabe = _n
	tostring buchstabe, replace

* recode the numeric values to letters
	replace buchstabe="a" if buchstabe=="1"
	replace buchstabe="b" if buchstabe=="2"
	replace buchstabe="c" if buchstabe=="3"
	replace buchstabe="d" if buchstabe=="4"
	replace buchstabe="e" if buchstabe=="5"
	replace buchstabe="f" if buchstabe=="6"
	replace buchstabe="g" if buchstabe=="7"
	replace buchstabe="h" if buchstabe=="8"
	replace buchstabe="i" if buchstabe=="9"
	replace buchstabe="j" if buchstabe=="10"
	replace buchstabe="k" if buchstabe=="11"
	replace buchstabe="l" if buchstabe=="12"
	replace buchstabe="m" if buchstabe=="13"
	replace buchstabe="n" if buchstabe=="14"
	replace buchstabe="o" if buchstabe=="15"
	replace buchstabe="p" if buchstabe=="16"
	replace buchstabe="q" if buchstabe=="17"
	replace buchstabe="r" if buchstabe=="18"
	replace buchstabe="s" if buchstabe=="19"
	replace buchstabe="t" if buchstabe=="20"
	replace buchstabe="u" if buchstabe=="21"
	replace buchstabe="v" if buchstabe=="22"
	replace buchstabe="w" if buchstabe=="23"
	replace buchstabe="x" if buchstabe=="24"
	replace buchstabe="y" if buchstabe=="25"
	replace buchstabe="z" if buchstabe=="26"
	replace buchstabe="" if buchstabe=="27"
	
*save tempfile (appended below)
	tempfile full_buchstabe 
	save `full_buchstabe'
}



* drop cases dealt with
	drop if missing(hnr_orig_segment)
	assert !missing(hnr_min)
	assert !missing(hnr_max)

*addresses with high numbers and ab/from corrected manually
	replace hnr_max = 337 if hnr_orig_segment=="ab333unger." & merge_strasse=="fuerstenriederstrasse"
	replace hnr_max = 425 if hnr_orig_segment=="ab365unger." & merge_strasse=="riemerstrasse"
	replace hnr_max = 570 if hnr_orig_segment=="ab542gerade" & merge_strasse=="dachauerstrasse"
	replace hnr_max = 394 if hnr_orig_segment=="ab384fortl." & merge_strasse=="balanstrasse"
	replace hnr_max = 386 if hnr_orig_segment=="ab340gerade" & merge_strasse=="hoeglwoertherstrasse"
	replace hnr_max = 391 if hnr_orig_segment=="ab345unger." & merge_strasse=="hoeglwoertherstrasse"
	replace hnr_max = 529 if hnr_orig_segment=="ab515unger." & merge_strasse=="landsbergerstrasse"
	replace hnr_max = 443 if hnr_orig_segment=="ab400fortl." & merge_strasse=="feldmochingerstrasse"
	replace hnr_max = 667 if hnr_orig_segment=="ab641unger." & merge_strasse=="dachauerstrasse"
	replace hnr_max = 334 if hnr_orig_segment=="ab332gerade" & merge_strasse=="lerchenauerstrasse"
	replace hnr_max = 345 if hnr_orig_segment=="ab335unger." & merge_strasse=="lerchenauerstrasse"
	assert hnr_min<=hnr_max

frame copy default tmp, replace
frame tmp {
*case.x: address named exactly (border cases of the segments: if 6-10a -> 6 and 10a)
* expand for first or second part
	expand 2

* gen identifier for first and second part
	bys stadtbez sb merge_strasse hnr_orig_segment: gen tmp1 = _n // 1 or 2
	bys stadtbez sb merge_strasse hnr_orig_segment: assert tmp1<=2
	
* gen address number (numeric and letter part)
	gen hnr = hnr_min if tmp1==1
	replace hnr = hnr_max if tmp1==2
	gen buchstabe = "."
	replace buchstabe = buchstaben_min if tmp1==1
	replace buchstabe = buchstaben_max if tmp1==2
	drop tmp1

* keep relevant variables
	keep merge_strasse hnr buchstabe sb stadtbez

* drop duplicates if min and max are equal
	bys merge_strasse hnr buchstabe sb stadtbez: keep if _n==1 

*save tempfile (appended below)
	tempfile exact 
	save `exact'
}


// split segments to the corresponding numbers (what is afterward still missing are potential letters belonging to addresses)
* generate a variable containing how many different numbers a segment contains
	gen tmp1 = hnr_max - hnr_min + 1

* generate for each number a segment contains duplicates 
	expand tmp1
	drop tmp1

* generate the address number
	bys stadtbez merge_strasse hnr_orig_segment: gen tmp3 = _n
	replace tmp3 = tmp3 - 1 + hnr_min

*some segments contain only even/odd numbers. generated numbers that do not fit are removed 
	drop if odd>0 & mod(tmp3, 2) == 0
	drop if even>0 & mod(tmp3, 2) == 1

* drop
	drop hnr_orig_segment
	
* rename address number variable 
	rename tmp3 hnr

	
frame copy default tmp, replace
frame tmp {
*case1: beginning and ending of segment contain only numbers and no letters
	keep if missing(buchstaben_min) & missing(buchstaben_max)
* generate duplicates for each letter but drop duplicates again if they are before the mentioned letter
	expand 27
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: assert(_N==27)
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: gen buchstabe = _n
	
* recode the numeric values to letters again
	tostring buchstabe, replace
	replace buchstabe="a" if buchstabe=="1"
	replace buchstabe="b" if buchstabe=="2"
	replace buchstabe="c" if buchstabe=="3"
	replace buchstabe="d" if buchstabe=="4"
	replace buchstabe="e" if buchstabe=="5"
	replace buchstabe="f" if buchstabe=="6"
	replace buchstabe="g" if buchstabe=="7"
	replace buchstabe="h" if buchstabe=="8"
	replace buchstabe="i" if buchstabe=="9"
	replace buchstabe="j" if buchstabe=="10"
	replace buchstabe="k" if buchstabe=="11"
	replace buchstabe="l" if buchstabe=="12"
	replace buchstabe="m" if buchstabe=="13"
	replace buchstabe="n" if buchstabe=="14"
	replace buchstabe="o" if buchstabe=="15"
	replace buchstabe="p" if buchstabe=="16"
	replace buchstabe="q" if buchstabe=="17"
	replace buchstabe="r" if buchstabe=="18"
	replace buchstabe="s" if buchstabe=="19"
	replace buchstabe="t" if buchstabe=="20"
	replace buchstabe="u" if buchstabe=="21"
	replace buchstabe="v" if buchstabe=="22"
	replace buchstabe="w" if buchstabe=="23"
	replace buchstabe="x" if buchstabe=="24"
	replace buchstabe="y" if buchstabe=="25"
	replace buchstabe="z" if buchstabe=="26"
	replace buchstabe="" if buchstabe=="27"
	
* keep only relevant variables
	drop ab even odd fort minus hnr_min hnr_max buchstaben_min buchstaben_max 

*save tempfile (appended below)
	tempfile no_buchstabe 
	save `no_buchstabe'
}


* drop cases dealt with
	drop if missing(buchstaben_min) & missing(buchstaben_max)

frame copy default tmp, replace
frame tmp {
*case 2: only beginning of the segment contains a letter (buchstaben_min)
	keep if missing(buchstaben_min)==0 & missing(buchstaben_max)
	
* generate numeric variable for the letter mentioned in the address (buchstaben_min)
	gen tmp_buch = "1" if buchstaben_min=="a"
	replace tmp_buch = "2" if buchstaben_min=="b"
	replace tmp_buch = "3" if buchstaben_min=="c"
	replace tmp_buch = "4" if buchstaben_min=="d"
	replace tmp_buch = "5" if buchstaben_min=="e"
	replace tmp_buch = "6" if buchstaben_min=="f"
	replace tmp_buch = "7" if buchstaben_min=="g"
	replace tmp_buch = "8" if buchstaben_min=="h"
	replace tmp_buch = "9" if buchstaben_min=="i"
	replace tmp_buch = "10" if buchstaben_min=="j"
	replace tmp_buch = "11" if buchstaben_min=="k"
	replace tmp_buch = "12" if buchstaben_min=="l"
	replace tmp_buch = "13" if buchstaben_min=="m"
	replace tmp_buch = "14" if buchstaben_min=="n"
	replace tmp_buch = "15" if buchstaben_min=="o"
	replace tmp_buch = "16" if buchstaben_min=="p"
	replace tmp_buch = "17" if buchstaben_min=="q"
	replace tmp_buch = "18" if buchstaben_min=="r"
	replace tmp_buch = "19" if buchstaben_min=="s"
	replace tmp_buch = "20" if buchstaben_min=="t"
	replace tmp_buch = "21" if buchstaben_min=="u"
	replace tmp_buch = "22" if buchstaben_min=="v"
	replace tmp_buch = "23" if buchstaben_min=="w"
	replace tmp_buch = "24" if buchstaben_min=="x"
	replace tmp_buch = "25" if buchstaben_min=="y"
	replace tmp_buch = "26" if buchstaben_min=="z"
	destring tmp_buch, replace
* generate duplicates for each letter but drop duplicates again if they are before the mentioned letter
	expand 27
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: assert(_N==27)
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: gen buchstabe = _n
	replace buchstabe = buchstabe-1
	drop if buchstabe < tmp_buch & hnr==hnr_min

* recode the numeric values to letters again
	tostring buchstabe, replace
	replace buchstabe="" if buchstabe=="0"
	replace buchstabe="a" if buchstabe=="1"
	replace buchstabe="b" if buchstabe=="2"
	replace buchstabe="c" if buchstabe=="3"
	replace buchstabe="d" if buchstabe=="4"
	replace buchstabe="e" if buchstabe=="5"
	replace buchstabe="f" if buchstabe=="6"
	replace buchstabe="g" if buchstabe=="7"
	replace buchstabe="h" if buchstabe=="8"
	replace buchstabe="i" if buchstabe=="9"
	replace buchstabe="j" if buchstabe=="10"
	replace buchstabe="k" if buchstabe=="11"
	replace buchstabe="l" if buchstabe=="12"
	replace buchstabe="m" if buchstabe=="13"
	replace buchstabe="n" if buchstabe=="14"
	replace buchstabe="o" if buchstabe=="15"
	replace buchstabe="p" if buchstabe=="16"
	replace buchstabe="q" if buchstabe=="17"
	replace buchstabe="r" if buchstabe=="18"
	replace buchstabe="s" if buchstabe=="19"
	replace buchstabe="t" if buchstabe=="20"
	replace buchstabe="u" if buchstabe=="21"
	replace buchstabe="v" if buchstabe=="22"
	replace buchstabe="w" if buchstabe=="23"
	replace buchstabe="x" if buchstabe=="24"
	replace buchstabe="y" if buchstabe=="25"
	replace buchstabe="z" if buchstabe=="26"
	
* keep only relevant variables
	drop ab even odd fort minus hnr_min hnr_max buchstaben_min buchstaben_max tmp_buch
	
*save tempfile (appended below)
	tempfile buchstabe_min
	save `buchstabe_min'
}


frame copy default tmp, replace
frame tmp {
*case 3: only ending of the segment contains a letter (buchstaben_max)
	keep if missing(buchstaben_min) & missing(buchstaben_max)==0
	
* generate numeric variable for the letter mentioned in the address (buchstaben_max)
	gen tmp_buch = "1" if buchstaben_max=="a"
	replace tmp_buch = "2" if buchstaben_max=="b"
	replace tmp_buch = "3" if buchstaben_max=="c"
	replace tmp_buch = "4" if buchstaben_max=="d"
	replace tmp_buch = "5" if buchstaben_max=="e"
	replace tmp_buch = "6" if buchstaben_max=="f"
	replace tmp_buch = "7" if buchstaben_max=="g"
	replace tmp_buch = "8" if buchstaben_max=="h"
	replace tmp_buch = "9" if buchstaben_max=="i"
	replace tmp_buch = "10" if buchstaben_max=="j"
	replace tmp_buch = "11" if buchstaben_max=="k"
	replace tmp_buch = "12" if buchstaben_max=="l"
	replace tmp_buch = "13" if buchstaben_max=="m"
	replace tmp_buch = "14" if buchstaben_max=="n"
	replace tmp_buch = "15" if buchstaben_max=="o"
	replace tmp_buch = "16" if buchstaben_max=="p"
	replace tmp_buch = "17" if buchstaben_max=="q"
	replace tmp_buch = "18" if buchstaben_max=="r"
	replace tmp_buch = "19" if buchstaben_max=="s"
	replace tmp_buch = "20" if buchstaben_max=="t"
	replace tmp_buch = "21" if buchstaben_max=="u"
	replace tmp_buch = "22" if buchstaben_max=="v"
	replace tmp_buch = "23" if buchstaben_max=="w"
	replace tmp_buch = "24" if buchstaben_max=="x"
	replace tmp_buch = "25" if buchstaben_max=="y"
	replace tmp_buch = "26" if buchstaben_max=="z"
	destring tmp_buch, replace
	
* generate duplicates for each letter but drop duplicates again if they are before the mentioned letter
	expand 27
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: assert(_N==27)
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: gen buchstabe = _n
	replace buchstabe = buchstabe-1
	drop if buchstabe > tmp_buch & hnr==hnr_max
	
* recode the numeric values to letters again
	tostring buchstabe, replace
	replace buchstabe="" if buchstabe=="0"
	replace buchstabe="a" if buchstabe=="1"
	replace buchstabe="b" if buchstabe=="2"
	replace buchstabe="c" if buchstabe=="3"
	replace buchstabe="d" if buchstabe=="4"
	replace buchstabe="e" if buchstabe=="5"
	replace buchstabe="f" if buchstabe=="6"
	replace buchstabe="g" if buchstabe=="7"
	replace buchstabe="h" if buchstabe=="8"
	replace buchstabe="i" if buchstabe=="9"
	replace buchstabe="j" if buchstabe=="10"
	replace buchstabe="k" if buchstabe=="11"
	replace buchstabe="l" if buchstabe=="12"
	replace buchstabe="m" if buchstabe=="13"
	replace buchstabe="n" if buchstabe=="14"
	replace buchstabe="o" if buchstabe=="15"
	replace buchstabe="p" if buchstabe=="16"
	replace buchstabe="q" if buchstabe=="17"
	replace buchstabe="r" if buchstabe=="18"
	replace buchstabe="s" if buchstabe=="19"
	replace buchstabe="t" if buchstabe=="20"
	replace buchstabe="u" if buchstabe=="21"
	replace buchstabe="v" if buchstabe=="22"
	replace buchstabe="w" if buchstabe=="23"
	replace buchstabe="x" if buchstabe=="24"
	replace buchstabe="y" if buchstabe=="25"
	replace buchstabe="z" if buchstabe=="26"
	
* keep only relevant variables
	drop ab even odd fort minus hnr_min hnr_max buchstaben_min buchstaben_max tmp_buch
	
*save tempfile (appended below)
	tempfile buchstabe_max 
	save `buchstabe_max'
}


* drop cases dealt with so far
	keep if missing(buchstaben_min)==0 & missing(buchstaben_max)==0
	
	
frame copy default tmp, replace
frame tmp {
*case4: assinment given for exactly one address (same street and same letter)
	keep if hnr_min==hnr_max & buchstaben_min==buchstaben_max
	gen buchstabe = buchstaben_min
	
* keep only relevant variables
	drop ab even odd fort minus hnr_min hnr_max buchstaben_min buchstaben_max 

*save tempfile (appended below)
	tempfile one_address 
	save `one_address'
}

frame copy default tmp, replace
frame tmp {
*case5: assignment given for exactly one street (but with different addresses (letters))
	keep if hnr_min==hnr_max & buchstaben_min!=buchstaben_max

* generate numeric variable for the first letter mentioned in the address (buchstaben_min)
	gen tmp_buch = "1" if buchstaben_min=="a"
	replace tmp_buch = "2" if buchstaben_min=="b"
	replace tmp_buch = "3" if buchstaben_min=="c"
	replace tmp_buch = "4" if buchstaben_min=="d"
	replace tmp_buch = "5" if buchstaben_min=="e"
	replace tmp_buch = "6" if buchstaben_min=="f"
	replace tmp_buch = "7" if buchstaben_min=="g"
	replace tmp_buch = "8" if buchstaben_min=="h"
	replace tmp_buch = "9" if buchstaben_min=="i"
	replace tmp_buch = "10" if buchstaben_min=="j"
	replace tmp_buch = "11" if buchstaben_min=="k"
	replace tmp_buch = "12" if buchstaben_min=="l"
	replace tmp_buch = "13" if buchstaben_min=="m"
	replace tmp_buch = "14" if buchstaben_min=="n"
	replace tmp_buch = "15" if buchstaben_min=="o"
	replace tmp_buch = "16" if buchstaben_min=="p"
	replace tmp_buch = "17" if buchstaben_min=="q"
	replace tmp_buch = "18" if buchstaben_min=="r"
	replace tmp_buch = "19" if buchstaben_min=="s"
	replace tmp_buch = "20" if buchstaben_min=="t"
	replace tmp_buch = "21" if buchstaben_min=="u"
	replace tmp_buch = "22" if buchstaben_min=="v"
	replace tmp_buch = "23" if buchstaben_min=="w"
	replace tmp_buch = "24" if buchstaben_min=="x"
	replace tmp_buch = "25" if buchstaben_min=="y"
	replace tmp_buch = "26" if buchstaben_min=="z"
	destring tmp_buch, replace

* generate numeric variable for the second letter mentioned in the address (buchstaben_max)
	gen tmp_buch2 = "1" if buchstaben_max=="a"
	replace tmp_buch2 = "2" if buchstaben_max=="b"
	replace tmp_buch2 = "3" if buchstaben_max=="c"
	replace tmp_buch2 = "4" if buchstaben_max=="d"
	replace tmp_buch2 = "5" if buchstaben_max=="e"
	replace tmp_buch2 = "6" if buchstaben_max=="f"
	replace tmp_buch2 = "7" if buchstaben_max=="g"
	replace tmp_buch2 = "8" if buchstaben_max=="h"
	replace tmp_buch2 = "9" if buchstaben_max=="i"
	replace tmp_buch2 = "10" if buchstaben_max=="j"
	replace tmp_buch2 = "11" if buchstaben_max=="k"
	replace tmp_buch2 = "12" if buchstaben_max=="l"
	replace tmp_buch2 = "13" if buchstaben_max=="m"
	replace tmp_buch2 = "14" if buchstaben_max=="n"
	replace tmp_buch2 = "15" if buchstaben_max=="o"
	replace tmp_buch2 = "16" if buchstaben_max=="p"
	replace tmp_buch2 = "17" if buchstaben_max=="q"
	replace tmp_buch2 = "18" if buchstaben_max=="r"
	replace tmp_buch2 = "19" if buchstaben_max=="s"
	replace tmp_buch2 = "20" if buchstaben_max=="t"
	replace tmp_buch2 = "21" if buchstaben_max=="u"
	replace tmp_buch2 = "22" if buchstaben_max=="v"
	replace tmp_buch2 = "23" if buchstaben_max=="w"
	replace tmp_buch2 = "24" if buchstaben_max=="x"
	replace tmp_buch2 = "25" if buchstaben_max=="y"
	replace tmp_buch2 = "26" if buchstaben_max=="z"
	destring tmp_buch2, replace

* generate duplicates for each letter but drop duplicates again if they are before the mentioned letter
	expand 26
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: assert(_N==26)
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: gen buchstabe = _n
	drop if buchstabe < tmp_buch
	drop if buchstabe > tmp_buch2
	tostring buchstabe, replace
	
* recode the numeric values to letters again
	replace buchstabe="a" if buchstabe=="1"
	replace buchstabe="b" if buchstabe=="2"
	replace buchstabe="c" if buchstabe=="3"
	replace buchstabe="d" if buchstabe=="4"
	replace buchstabe="e" if buchstabe=="5"
	replace buchstabe="f" if buchstabe=="6"
	replace buchstabe="g" if buchstabe=="7"
	replace buchstabe="h" if buchstabe=="8"
	replace buchstabe="i" if buchstabe=="9"
	replace buchstabe="j" if buchstabe=="10"
	replace buchstabe="k" if buchstabe=="11"
	replace buchstabe="l" if buchstabe=="12"
	replace buchstabe="m" if buchstabe=="13"
	replace buchstabe="n" if buchstabe=="14"
	replace buchstabe="o" if buchstabe=="15"
	replace buchstabe="p" if buchstabe=="16"
	replace buchstabe="q" if buchstabe=="17"
	replace buchstabe="r" if buchstabe=="18"
	replace buchstabe="s" if buchstabe=="19"
	replace buchstabe="t" if buchstabe=="20"
	replace buchstabe="u" if buchstabe=="21"
	replace buchstabe="v" if buchstabe=="22"
	replace buchstabe="w" if buchstabe=="23"
	replace buchstabe="x" if buchstabe=="24"
	replace buchstabe="y" if buchstabe=="25"
	replace buchstabe="z" if buchstabe=="26"

* keep only relevant variables
	drop ab even odd fort minus hnr_min hnr_max buchstaben_min buchstaben_max tmp_buch tmp_buch2
	
*save tempfile (appended below)
	tempfile one_number 
	save `one_number'
}

* drop cases dealt with so far
	drop if hnr_min==hnr_max


*last remaining case (case6): first and second mentioned address contain letter and both have different numbers (e.g., 8b-11d)
	* generate numeric variable for the first letter mentioned in the address (buchstaben_min)
	gen tmp_buch = "1" if buchstaben_min=="a"
	replace tmp_buch = "2" if buchstaben_min=="b"
	replace tmp_buch = "3" if buchstaben_min=="c"
	replace tmp_buch = "4" if buchstaben_min=="d"
	replace tmp_buch = "5" if buchstaben_min=="e"
	replace tmp_buch = "6" if buchstaben_min=="f"
	replace tmp_buch = "7" if buchstaben_min=="g"
	replace tmp_buch = "8" if buchstaben_min=="h"
	replace tmp_buch = "9" if buchstaben_min=="i"
	replace tmp_buch = "10" if buchstaben_min=="j"
	replace tmp_buch = "11" if buchstaben_min=="k"
	replace tmp_buch = "12" if buchstaben_min=="l"
	replace tmp_buch = "13" if buchstaben_min=="m"
	replace tmp_buch = "14" if buchstaben_min=="n"
	replace tmp_buch = "15" if buchstaben_min=="o"
	replace tmp_buch = "16" if buchstaben_min=="p"
	replace tmp_buch = "17" if buchstaben_min=="q"
	replace tmp_buch = "18" if buchstaben_min=="r"
	replace tmp_buch = "19" if buchstaben_min=="s"
	replace tmp_buch = "20" if buchstaben_min=="t"
	replace tmp_buch = "21" if buchstaben_min=="u"
	replace tmp_buch = "22" if buchstaben_min=="v"
	replace tmp_buch = "23" if buchstaben_min=="w"
	replace tmp_buch = "24" if buchstaben_min=="x"
	replace tmp_buch = "25" if buchstaben_min=="y"
	replace tmp_buch = "26" if buchstaben_min=="z"
	destring tmp_buch, replace

* generate numeric variable for the second letter mentioned in the address (buchstaben_max)
	gen tmp_buch2 = "1" if buchstaben_max=="a"
	replace tmp_buch2 = "2" if buchstaben_max=="b"
	replace tmp_buch2 = "3" if buchstaben_max=="c"
	replace tmp_buch2 = "4" if buchstaben_max=="d"
	replace tmp_buch2 = "5" if buchstaben_max=="e"
	replace tmp_buch2 = "6" if buchstaben_max=="f"
	replace tmp_buch2 = "7" if buchstaben_max=="g"
	replace tmp_buch2 = "8" if buchstaben_max=="h"
	replace tmp_buch2 = "9" if buchstaben_max=="i"
	replace tmp_buch2 = "10" if buchstaben_max=="j"
	replace tmp_buch2 = "11" if buchstaben_max=="k"
	replace tmp_buch2 = "12" if buchstaben_max=="l"
	replace tmp_buch2 = "13" if buchstaben_max=="m"
	replace tmp_buch2 = "14" if buchstaben_max=="n"
	replace tmp_buch2 = "15" if buchstaben_max=="o"
	replace tmp_buch2 = "16" if buchstaben_max=="p"
	replace tmp_buch2 = "17" if buchstaben_max=="q"
	replace tmp_buch2 = "18" if buchstaben_max=="r"
	replace tmp_buch2 = "19" if buchstaben_max=="s"
	replace tmp_buch2 = "20" if buchstaben_max=="t"
	replace tmp_buch2 = "21" if buchstaben_max=="u"
	replace tmp_buch2 = "22" if buchstaben_max=="v"
	replace tmp_buch2 = "23" if buchstaben_max=="w"
	replace tmp_buch2 = "24" if buchstaben_max=="x"
	replace tmp_buch2 = "25" if buchstaben_max=="y"
	replace tmp_buch2 = "26" if buchstaben_max=="z"
	destring tmp_buch2, replace
	
* generate duplicates for each letter but drop duplicates again if they are before the mentioned letter
	expand 27
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: assert(_N==27)
	bys stadtbez merge_strasse hnr ab even odd fort minus hnr_min hnr_max: gen buchstabe = _n
	replace buchstabe = buchstabe-1
	drop if buchstabe < tmp_buch & hnr_min==hnr
	drop if buchstabe > tmp_buch2 & hnr_max==hnr
	tostring buchstabe, replace
	
* recode the numeric values to letters again
	replace buchstabe="" if buchstabe=="0"
	replace buchstabe="a" if buchstabe=="1"
	replace buchstabe="b" if buchstabe=="2"
	replace buchstabe="c" if buchstabe=="3"
	replace buchstabe="d" if buchstabe=="4"
	replace buchstabe="e" if buchstabe=="5"
	replace buchstabe="f" if buchstabe=="6"
	replace buchstabe="g" if buchstabe=="7"
	replace buchstabe="h" if buchstabe=="8"
	replace buchstabe="i" if buchstabe=="9"
	replace buchstabe="j" if buchstabe=="10"
	replace buchstabe="k" if buchstabe=="11"
	replace buchstabe="l" if buchstabe=="12"
	replace buchstabe="m" if buchstabe=="13"
	replace buchstabe="n" if buchstabe=="14"
	replace buchstabe="o" if buchstabe=="15"
	replace buchstabe="p" if buchstabe=="16"
	replace buchstabe="q" if buchstabe=="17"
	replace buchstabe="r" if buchstabe=="18"
	replace buchstabe="s" if buchstabe=="19"
	replace buchstabe="t" if buchstabe=="20"
	replace buchstabe="u" if buchstabe=="21"
	replace buchstabe="v" if buchstabe=="22"
	replace buchstabe="w" if buchstabe=="23"
	replace buchstabe="x" if buchstabe=="24"
	replace buchstabe="y" if buchstabe=="25"
	replace buchstabe="z" if buchstabe=="26"

* keep only relevant variables
	drop ab even odd fort minus hnr_min hnr_max buchstaben_min buchstaben_max tmp_buch tmp_buch2
  
*append all datasets created before   
	append using `full_buchstabe'
	append using `no_buchstabe'
	append using `buchstabe_min'
	append using `buchstabe_max'
	append using `one_address'
	append using `one_number'

*merge exact case (e.g., 4a in ab4a or 6a and 8b in 6a-8b) and drop merged addresses
*they might occur a second time with a different precinct due to partly imprecise electoral rolls 
	merge m:1 stadtbez sb merge_strasse hnr buchstabe using `exact', assert(1 3) keep(1) nogen
*append these exact cases, where the correct precinct is known for sure
	append using `exact'	
	
*generate addresse number (number and letter)
	tostring hnr, replace 
	gen nummer = hnr + buchstabe
	lab var nummer "address number"

* destring precinct and district ID
	destring stadtbez sb, replace
	*tostring stadtbez sb, replace

* Handle duplicate addresses:
	* 1) duplicates within district across precincts: drop BOTH b/c definitive assignment to precinct not possible
	* 2) duplicates within precinct (sb): keep one copy
	duplicates drop stadtbez merge_strasse nummer sb, force
	bys stadtbez merge_strasse nummer: keep if _N==1

* keep only necessary variables
	keep merge_strasse nummer sb stadtbez

*save
	save "$tmp/strassen_btw09_voll_buchstabe.dta", replace